import pandas as pd

data_files = ['X_desc', 'trn_per_100_no_d', 'trn_per_300', 'trn_per_100']
file = data_files[0]
trn_data = pd.read_csv(f'./data/{file}.csv', header=0)
outfeas = [a for a in trn_data.columns if a not in ['ship', 'type']]

fea2 = []
N0 = []
for fea in outfeas:
    num_u = trn_data[fea].nunique()
    num_0 = sum(trn_data[fea] == 0)
    if num_u < 5 or num_0 > 1000:
        print(fea, num_u, num_0)
    else:
        fea2 += [fea]

print('ori:', len(outfeas), 'res:', len(fea2))
trn3 = trn_data[fea2+['ship', 'type']]
trn3.to_csv(f'./data/{file}_d0.csv', header=True, index=False)